# Direct outputs not eligible for public release

# GLOBAL SETTINGS --------------------------------------------------------------

options(
  scipen = 999,
  digits = 16,
  max.print = .Machine$integer.max,
  show.error.locations = TRUE,
  warn = 1
)

RNGkind("L'Ecuyer-CMRG")
seed <- 818675309L
set.seed(seed) # setting main seed

# PACKAGES ---------------------------------------------------------------------

library(data.table)

# PACKAGE SETTINGS -------------------------------------------------------------

# data.table
setDTthreads(threads = 1)
options(datatable.print.class = TRUE, datatable.print.keys = TRUE)
# so that printing the data.table also shows the variable type on top

# BEGIN FILE -------------------------------------------------------------------

################################################################################
# Table 2.1 inputs -- part 1 (main characteristics)
################################################################################

# Read in annual summary for tax filers
years <- 1999:2016

tax_filer_dt <- list()
i <- 0L

for (year in years){

    i <- i + 1L

    tax_filer_dt[[i]] <-
        readRDS(
            sprintf(
                "~/summary-output/population-stats-and-quantile-shares-%s.rds",
                year
            )
        )
    tax_filer_dt[[i]] <- setDT(tax_filer_dt[[i]][[1]]) # first element of list
}

tax_filer_dt <- rbindlist(tax_filer_dt, use.names = TRUE)

# Construct a population-weighted mean across 1999 to 2016 (one row)

weights <- tax_filer_dt$count # will take a wtd avg by adults across tax years
mean_count <- mean(tax_filer_dt$count)
tax_filer_dt[, c("count", "tax_yr") := NULL]

tax_filer_dt <-
    tax_filer_dt[
        ,
        lapply(.SD, weighted.mean, w = weights),
        .SDcols = colnames(tax_filer_dt)
    ]

tax_filer_dt <-
    tax_filer_dt[
        ,
        .(
            mean_female,
            mean_age,
            mean_wages,
            mean_employment,
            mean_married,
            mean_home
        )
    ]
tax_filer_dt[, total_count := mean_count]
tax_filer_dt[, dt_name := "tax_filer_dt"]

# Housekeeping
weights <- NULL
mean_count <- NULL
rm(weights, mean_count)

# Produce a row from the winners data
ES_data_omitted_year <-
    readRDS("~/population-panel-data/stacked-lottery-data-omitted-year.rds")

unique_units <- length(unique(ES_data_omitted_year$tin))
total_treated_units <- dim(ES_data_omitted_year[treated == 1])[1]

# Construct cohort weights
ES_data_omitted_year[
    ,
    cohort_weight := sum(treated) / total_treated_units,
    by = .(ref_onset_time)
]

winner_dt <-
    ES_data_omitted_year[
        ,
        .(
            mean_female = weighted.mean(x = female, w = cohort_weight),
            mean_age = weighted.mean(x = age, w = cohort_weight),
            mean_wages = weighted.mean(x = db_w2_wages, w = cohort_weight),
            mean_employment = weighted.mean(x = has_w2_wages, w = cohort_weight),
            mean_married = weighted.mean(x = married_ch, w = cohort_weight),
            mean_home = weighted.mean(x = db_homeowner, w = cohort_weight),
            total_count = unique_units
        )
    ]
winner_dt[, dt_name := "winner_dt"]

summary_dt <- rbindlist(list(winner_dt, tax_filer_dt), use.names = TRUE)
saveRDS(
    summary_dt,
    "~/summary-output/covariates_all_winners_vs_tax_filer_dt.rds"
)

# Housekeeping
tax_filer_dt <- NULL
winner_dt <- NULL
summary_dt <- NULL
rm(tax_filer_dt, winner_dt, summary_dt)

################################################################################
# Table 2.1 inputs -- part 1 (relative quartile shares)
################################################################################

years <- 1999:2016

winner_quantiles <- list()
i <- 0L

for (year in years){

    i <- i + 1L

    winner_quantiles[[i]] <-
        readRDS(
            sprintf(
                "~/summary-output/population-stats-and-quantile-shares-%s.rds",
                year
            )
        )
    winner_quantiles[[i]] <- setDT(winner_quantiles[[i]][[2]]) # second element of list
}

winner_quantiles <- rbindlist(winner_quantiles, use.names = TRUE)

# sum up to quartiles

winner_quantiles[population_quantile < 0.25, population_quartile := 1]
winner_quantiles[
    population_quantile >= 0.25 & population_quantile < 0.50,
    population_quartile := 2
]
winner_quantiles[
    population_quantile >= 0.50 & population_quantile < 0.75,
    population_quartile := 3
]
winner_quantiles[population_quantile >= 0.75, population_quartile := 4]

winner_quartiles <-
    winner_quantiles[
        ,
        .(
            count = sum(count),
            total = sum(total)
        ),
        by = .(population_quartile, tax_yr)
    ]
winner_quartiles[, prop := count / total]

# And then just the average across years within Quartile
winner_quartiles <-
    winner_quartiles[
        ,
        .(mean_prop = mean(prop)),
        by = (population_quartile)
    ][order(population_quartile)]

saveRDS(
    winner_quartiles,
    "~/summary-output/quartile_population_shares.rds"
)

################################################################################
# Table A.1 inputs
################################################################################

# - PRE-TAX HOUSEHOLD WINNINGS DISTRIBUTIONS (2016 USD)
# - POST-TAX PER-ADULT WINNINGS DISTRIBUTIONS (2016 USD)
# - ANNUITIZED VERSION OF POST-TAX PER-ADULT WINNINGS DISTRIBUTIONS (2016 USD)

winners <- readRDS("~/population-crossection-data/lottery-data.rds")
our_distributions <-
    winners[
        ,
        .(
            age_case,
            L_multiperiod_nonequiv_pretax,
            L_multiperiod,
            L_ann_multiperiod
        )
    ]
saveRDS(
    our_distributions,
    "~/summary-output/our-distributions.rds"
)

# Housekeeping
winners <- NULL
our_distributions <- NULL
rm(our_distributions, winners)

################################################################################
# Table A.2 inputs
################################################################################

summary_table_TvC <-
    ES_data_omitted_year[
        ,
        .(
            mean_female = weighted.mean(x = female, w = cohort_weight),
            mean_age = weighted.mean(x = age, w = cohort_weight),
            mean_wages = weighted.mean(x = db_w2_wages, w = cohort_weight),
            mean_employment = weighted.mean(x = has_w2_wages, w = cohort_weight),
            mean_married = weighted.mean(x = married_ch, w = cohort_weight),
            mean_home = weighted.mean(x = db_homeowner, w = cohort_weight),
            mean_lottery_winning = weighted.mean(x = L_multiperiod, w = cohort_weight)
        ),
        by = .(treated)
    ]
summary_table_TvC[, count := unique_units]

saveRDS(
    summary_table_TvC,
    "~/summary-output/covariates_treated_vs_control_dt.rds"
)

# Housekeeping
summary_table_TvC <- NULL
ES_data_omitted_year <- NULL
rm(summary_table_TvC, ES_data_omitted_year)